mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
aarch64: Model zero-high-half semantics of [SU]QXTN instructions
Split the aarch64_<su>qmovn<mode> pattern into separate scalar and vector variants. Further split the vector RTL pattern into big/ little endian variants that model the zero-high-half semantics of the underlying instruction. Modeling these semantics allows for better RTL combinations while also removing some register allocation issues as the compiler now knows that the operation is totally destructive. Add new tests to narrow_zero_high_half.c to verify the benefit of this change. gcc/ChangeLog: 2021-06-14 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Split generator for aarch64_<su>qmovn builtins into scalar and vector variants. * config/aarch64/aarch64-simd.md (aarch64_<su>qmovn<mode>_insn_le): Define. (aarch64_<su>qmovn<mode>_insn_be): Define. (aarch64_<su>qmovn<mode>): Split into scalar and vector variants. Change vector variant to an expander that emits the correct instruction depending on endianness. gcc/testsuite/ChangeLog: * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.
This commit is contained in:
@@ -271,8 +271,10 @@
|
||||
BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE)
|
||||
|
||||
/* Implemented by aarch64_<su>qmovn<mode>. */
|
||||
BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, NONE)
|
||||
BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, NONE)
|
||||
BUILTIN_VQN (UNOP, sqmovn, 0, NONE)
|
||||
BUILTIN_SD_HSDI (UNOP, sqmovn, 0, NONE)
|
||||
BUILTIN_VQN (UNOP, uqmovn, 0, NONE)
|
||||
BUILTIN_SD_HSDI (UNOP, uqmovn, 0, NONE)
|
||||
|
||||
/* Implemented by aarch64_<su>qxtn2<mode>. */
|
||||
BUILTIN_VQN (BINOP, sqxtn2, 0, NONE)
|
||||
|
||||
@@ -4875,10 +4875,54 @@
|
||||
(define_insn "aarch64_<su>qmovn<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
|
||||
(SAT_TRUNC:<VNARROWQ>
|
||||
(match_operand:VSQN_HSDI 1 "register_operand" "w")))]
|
||||
(match_operand:SD_HSDI 1 "register_operand" "w")))]
|
||||
"TARGET_SIMD"
|
||||
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<su>qmovn<mode>_insn_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(SAT_TRUNC:<VNARROWQ>
|
||||
(match_operand:VQN 1 "register_operand" "w"))
|
||||
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
|
||||
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
|
||||
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<su>qmovn<mode>_insn_be"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
(match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
|
||||
(SAT_TRUNC:<VNARROWQ>
|
||||
(match_operand:VQN 1 "register_operand" "w"))))]
|
||||
"TARGET_SIMD && BYTES_BIG_ENDIAN"
|
||||
"<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_expand "aarch64_<su>qmovn<mode>"
|
||||
[(set (match_operand:<VNARROWQ> 0 "register_operand")
|
||||
(SAT_TRUNC:<VNARROWQ>
|
||||
(match_operand:VQN 1 "register_operand")))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
|
||||
CONST0_RTX (<VNARROWQ>mode)));
|
||||
else
|
||||
emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
|
||||
CONST0_RTX (<VNARROWQ>mode)));
|
||||
|
||||
/* The intrinsic expects a narrow result, so emit a subreg that will get
|
||||
optimized away as appropriate. */
|
||||
emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
|
||||
<VNARROWQ2>mode));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<su>qxtn2<mode>_le"
|
||||
|
||||
@@ -67,6 +67,13 @@ TEST_UNARY (vqmovun, uint8x16_t, int16x8_t, s16, u8)
|
||||
TEST_UNARY (vqmovun, uint16x8_t, int32x4_t, s32, u16)
|
||||
TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32)
|
||||
|
||||
TEST_UNARY (vqmovn, int8x16_t, int16x8_t, s16, s8)
|
||||
TEST_UNARY (vqmovn, int16x8_t, int32x4_t, s32, s16)
|
||||
TEST_UNARY (vqmovn, int32x4_t, int64x2_t, s64, s32)
|
||||
TEST_UNARY (vqmovn, uint8x16_t, uint16x8_t, u16, u8)
|
||||
TEST_UNARY (vqmovn, uint16x8_t, uint32x4_t, u32, u16)
|
||||
TEST_UNARY (vqmovn, uint32x4_t, uint64x2_t, u64, u32)
|
||||
|
||||
/* { dg-final { scan-assembler-not "dup\\t" } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */
|
||||
@@ -79,3 +86,5 @@ TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32)
|
||||
/* { dg-final { scan-assembler-times "\\tsqrshrun\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\txtn\\tv" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\tuqxtn\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "\\tsqxtn\\tv" 3} } */
|
||||
|
||||
Reference in New Issue
Block a user