aarch64: Use VNx16BI for floating-point svcmp*

This patch continues the work of making ACLE intrinsics use VNx16BI
for svbool_t results.  It deals with the floating-point forms of svcmp*.

gcc/
	* config/aarch64/aarch64-sve.md (@aarch64_pred_fcm<cmp_op><mode>_acle)
	(*aarch64_pred_fcm<cmp_op><mode>_acle, @aarch64_pred_fcmuo<mode>_acle)
	(*aarch64_pred_fcmuo<mode>_acle): New patterns.
	* config/aarch64/aarch64-sve-builtins-base.cc
	(svcmp_impl::expand, svcmpuo_impl::expand): Use them.

gcc/testsuite/
	* gcc.target/aarch64/sve/acle/general/cmpeq_6.c: New test.
	* gcc.target/aarch64/sve/acle/general/cmpge_9.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/cmpgt_9.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/cmple_9.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/cmplt_9.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/cmpne_5.c: Likewise.
	* gcc.target/aarch64/sve/acle/general/cmpuo_1.c: Likewise.
This commit is contained in:
Richard Sandiford
2025-08-04 11:45:33 +01:00
parent 690586e796
commit 2cf2cc8183
9 changed files with 802 additions and 2 deletions

View File

@@ -500,7 +500,7 @@ public:
return e.use_exact_insn (code_for_aarch64_pred_cmp_acle (code, mode));
}
insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
insn_code icode = code_for_aarch64_pred_fcm_acle (m_unspec_for_fp, mode);
return e.use_exact_insn (icode);
}
@@ -581,7 +581,8 @@ public:
expand (function_expander &e) const override
{
e.add_ptrue_hint (0, e.gp_mode (0));
return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
auto mode = e.vector_mode (0);
return e.use_exact_insn (code_for_aarch64_pred_fcmuo_acle (mode));
}
};

View File

@@ -9012,6 +9012,43 @@
}
)
(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand")
(and:VNx16BI
(subreg:VNx16BI
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 3 "register_operand")
(match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
SVE_COND_FP_CMP_I0)
0)
(match_dup 5)))]
"TARGET_SVE"
{
operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
}
)
(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand")
(and:VNx16BI
(subreg:VNx16BI
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 3 "register_operand")
(match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
SVE_COND_FP_CMP_I0)
0)
(match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 3 , 4 ]
[ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
[ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
}
)
;; Same for unordered comparisons.
(define_insn "@aarch64_pred_fcmuo<mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
@@ -9025,6 +9062,40 @@
"fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
(define_expand "@aarch64_pred_fcmuo<mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand")
(and:VNx16BI
(subreg:VNx16BI
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 3 "register_operand")
(match_operand:SVE_F 4 "register_operand")]
UNSPEC_COND_FCMUO)
0)
(match_dup 5)))]
"TARGET_SVE"
{
operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
}
)
(define_insn "*aarch64_pred_fcmuo<mode>_acle"
[(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
(and:VNx16BI
(subreg:VNx16BI
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(match_operand:SVE_F 3 "register_operand" "w")
(match_operand:SVE_F 4 "register_operand" "w")]
UNSPEC_COND_FCMUO)
0)
(match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
"TARGET_SVE"
"fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
)
;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
;; with another predicate P. This does not have the same trapping behavior
;; as predicating the comparison itself on P, but it's a legitimate fold,

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmeq p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmpeq (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmeq p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpeq (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmeq p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpeq (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmeq p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpeq (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmeq p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpeq (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmeq p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpeq (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmeq p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpeq (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmge p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmpge (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmge p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpge (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmge p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpge (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmge p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpge (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmge p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpge (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmge p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpge (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmge p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpge (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmgt p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmpgt (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmgt p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpgt (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmgt p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpgt (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmgt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpgt (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmgt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpgt (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmgt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpgt (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmgt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpgt (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmle p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmple (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmle p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmple (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmle p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmple (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmle p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmple (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmle p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmple (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmle p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmple (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmle p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmple (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmlt p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmplt (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmlt p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmplt (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmlt p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmplt (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmlt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmplt (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmlt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmplt (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmlt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmplt (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmlt p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmplt (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmne p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmpne (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmne p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpne (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmne p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpne (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmne p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpne (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmne p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpne (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmne p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpne (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmne p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpne (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,104 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test1:
** fcmuo p0\.h, p0/z, z0\.h, z1\.h
** ret
*/
svbool_t
test1 (svbool_t pg, svfloat16_t x, svfloat16_t y)
{
return svand_z (svptrue_b8 (),
svcmpuo (pg, x, y),
svptrue_b16 ());
}
/*
** test2:
** fcmuo p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test2 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpuo (pg, x, y),
svptrue_b16 ());
}
/*
** test3:
** fcmuo p0\.s, p0/z, z0\.s, z1\.s
** ret
*/
svbool_t
test3 (svbool_t pg, svfloat32_t x, svfloat32_t y)
{
return svand_z (svptrue_b8 (),
svcmpuo (pg, x, y),
svptrue_b32 ());
}
/*
** test4:
** fcmuo p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test4 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpuo (pg, x, y),
svptrue_b16 ());
}
/*
** test5:
** fcmuo p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test5 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpuo (pg, x, y),
svptrue_b8 ());
}
/*
** test6:
** fcmuo p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test6 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b8 (),
svcmpuo (pg, x, y),
svptrue_b64 ());
}
/*
** test7:
** fcmuo p0\.d, p0/z, z0\.d, z1\.d
** ret
*/
svbool_t
test7 (svbool_t pg, svfloat64_t x, svfloat64_t y)
{
return svand_z (svptrue_b32 (),
svcmpuo (pg, x, y),
svptrue_b64 ());
}
#ifdef __cplusplus
}
#endif