mirror of
https://github.com/gcc-mirror/gcc.git
synced 2026-05-06 14:59:39 +02:00
AArch64: Use anchors for vector constants [PR 121240]
Enable anchors for vector constants - like FP, expand vector constants early and place them in the constdata section. Avoid unnecessary loads by expanding simple cases using DUP. Performance on SPECFP2017 is ~0.3% better, codesize increases by 0.05% due to extra const data. gcc: PR target/121240 * config/aarch64/aarch64-simd.md (mov<mode>): Expand vector constants early. * config/aarch64/aarch64.cc (aarch64_select_rtx_section): Force vector immediates <= 16 bytes to constdata. gcc/testsuite: PR target/121240 * gcc.target/aarch64/const_create_using_fmov.c: Fix test. * gcc.target/aarch64/pr121240.c: Add new test. * gcc.target/aarch64/vec-init-single-const.c: Fix test. * gcc.target/aarch64/vect-cse-codegen.c: Fix test.
This commit is contained in:
@@ -70,11 +70,32 @@
|
||||
contains CONST_POLY_INTs), build it up from individual elements instead.
|
||||
We should only need to do this before RA; aarch64_legitimate_constant_p
|
||||
should ensure that we don't try to rematerialize the constant later. */
|
||||
if (GET_CODE (operands[1]) == CONST_VECTOR
|
||||
&& targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
|
||||
if (GET_CODE (operands[1]) == CONST_VECTOR)
|
||||
{
|
||||
aarch64_expand_vector_init (operands[0], operands[1]);
|
||||
DONE;
|
||||
if (targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
|
||||
{
|
||||
aarch64_expand_vector_init (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
else if (!aarch64_simd_imm_zero (operands[1], <MODE>mode)
|
||||
&& !aarch64_simd_special_constant_p (operands[1], <MODE>mode)
|
||||
&& !aarch64_simd_valid_mov_imm (operands[1]))
|
||||
{
|
||||
rtx x;
|
||||
/* Expand into VDUP. */
|
||||
if (TARGET_SIMD && const_vec_duplicate_p (operands[1], &x))
|
||||
{
|
||||
x = force_reg (GET_MODE_INNER (<MODE>mode), x);
|
||||
operands[1] = gen_vec_duplicate (<MODE>mode, x);
|
||||
emit_move_insn (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
|
||||
/* Expand into a literal load using anchors. */
|
||||
operands[1] = force_const_mem (<MODE>mode, operands[1]);
|
||||
emit_move_insn (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
}
|
||||
"
|
||||
)
|
||||
|
||||
@@ -14349,8 +14349,8 @@ aarch64_select_rtx_section (machine_mode mode,
|
||||
return function_section (current_function_decl);
|
||||
|
||||
/* When using anchors for constants use the readonly section. */
|
||||
if ((CONST_INT_P (x) || CONST_DOUBLE_P (x))
|
||||
&& known_le (GET_MODE_SIZE (mode), 8))
|
||||
if ((CONST_INT_P (x) || CONST_DOUBLE_P (x) || CONST_VECTOR_P (x))
|
||||
&& known_le (GET_MODE_SIZE (mode), 16))
|
||||
return readonly_data_section;
|
||||
|
||||
return default_elf_select_rtx_section (mode, x, align);
|
||||
|
||||
@@ -78,8 +78,8 @@ uint16x8_t f5() {
|
||||
|
||||
/*
|
||||
** f6:
|
||||
** adrp x0, \.LC0
|
||||
** ldr q0, \[x0, #:lo12:\.LC0\]
|
||||
** mov w0, 1333788672
|
||||
** dup v0.4s, w0
|
||||
** ret
|
||||
*/
|
||||
uint32x4_t f6() {
|
||||
|
||||
15
gcc/testsuite/gcc.target/aarch64/pr121240.c
Normal file
15
gcc/testsuite/gcc.target/aarch64/pr121240.c
Normal file
@@ -0,0 +1,15 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mcmodel=small" } */
|
||||
|
||||
const double b[4] = {0.2435334343f, 0.2233535343f, 0.4232433f, 0.34343434f};
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
|
||||
v2df f (v2df c1, v2df c2)
|
||||
{
|
||||
v2df a1 = *(v2df *)&b[0];
|
||||
v2df a2 = *(v2df *)&b[2];
|
||||
return (a1 * c1) + (a2 * c2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "adrp" 1 } } */
|
||||
@@ -46,9 +46,9 @@ int32x4_t f_s32(int32_t x)
|
||||
|
||||
/*
|
||||
** f_s64:
|
||||
** adrp x[0-9]+, .LC[0-9]+
|
||||
** ldr q0, \[x[0-9]+, #:lo12:.LC[0-9]+\]
|
||||
** ins v0\.d\[0\], x0
|
||||
** fmov d0, x0
|
||||
** mov (x[0-9]+), 1
|
||||
** ins v0\.d\[1\], \1
|
||||
** ret
|
||||
*/
|
||||
|
||||
|
||||
@@ -6,12 +6,14 @@
|
||||
|
||||
/*
|
||||
**test1:
|
||||
** adrp x[0-9]+, .LC[0-9]+
|
||||
** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\]
|
||||
** mov x[0-9]+, 16502
|
||||
** movk x[0-9]+, 0x1023, lsl 16
|
||||
** movk x[0-9]+, 0x4308, lsl 32
|
||||
** movk x[0-9]+, 0x942, lsl 48
|
||||
** dup v[0-9]+.2d, x[0-9]+
|
||||
** add v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d
|
||||
** str q[0-9]+, \[x[0-9]+\]
|
||||
** fmov x[0-9]+, d[0-9]+
|
||||
** orr x[0-9]+, x[0-9]+, x[0-9]+
|
||||
** str q[0-9]+, \[x[0-9]+\]
|
||||
** ret
|
||||
*/
|
||||
|
||||
@@ -27,12 +29,14 @@ test1 (uint64_t a, uint64x2_t b, uint64x2_t* rt)
|
||||
|
||||
/*
|
||||
**test2:
|
||||
** adrp x[0-9]+, .LC[0-1]+
|
||||
** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\]
|
||||
** mov x[0-9]+, 16502
|
||||
** movk x[0-9]+, 0x4223, lsl 16
|
||||
** movk x[0-9]+, 0x3032, lsl 32
|
||||
** movk x[0-9]+, 0x424, lsl 48
|
||||
** dup v[0-9]+.2d, x[0-9]+
|
||||
** add v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d
|
||||
** str q[0-9]+, \[x[0-9]+\]
|
||||
** fmov x[0-9]+, d[0-9]+
|
||||
** orr x[0-9]+, x[0-9]+, x[0-9]+
|
||||
** str q[0-9]+, \[x[0-9]+\]
|
||||
** ret
|
||||
*/
|
||||
|
||||
@@ -48,12 +52,12 @@ test2 (uint64_t a, uint64x2_t b, uint64x2_t* rt)
|
||||
|
||||
/*
|
||||
**test3:
|
||||
** adrp x[0-9]+, .LC[0-9]+
|
||||
** ldr q[0-9]+, \[x[0-9]+, #:lo12:.LC[0-9]+\]
|
||||
** mov w[0-9]+, 16963
|
||||
** movk w[0-9]+, 0x9, lsl 16
|
||||
** dup v[0-9]+.4s, w[0-9]+
|
||||
** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
|
||||
** str q[0-9]+, \[x1\]
|
||||
** fmov w[0-9]+, s[0-9]+
|
||||
** orr w[0-9]+, w[0-9]+, w[0-9]+
|
||||
** str q[0-9]+, \[x1\]
|
||||
** ret
|
||||
*/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user