x86_cse: Use integer load for CONST_VECTOR load

CONST_VECTOR load no larger than integer register

(set (reg:V2QI 294)
     (const_vector:V2QI [(const_int 0 [0]) repeated x2]))

can use integer load.  Use inner mode as the scalar mode for CONST_VECTOR
load source.

gcc/

	PR target/125009
	* config/i386/i386-features.cc (ix86_place_single_vector_set):
	Support CONST_VECTOR load no larger than integer register.
	(ix86_broadcast_inner): Use inner mode as the scalar mode for
	CONST_VECTOR load source.
	(pass_x86_cse::x86_cse): Generate CONST_VECTOR broadcast source
	for CONST_VECTOR load no larger than integer register.

gcc/testsuite/

	PR target/125009
	* g++.target/i386/pr125009.C: New test.
	* gcc.target/i386/pr125009.c: Likewise.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
This commit is contained in:
H.J. Lu
2026-04-24 06:39:44 +08:00
parent 799e24aded
commit d16bee7da5
3 changed files with 83 additions and 4 deletions

View File

@@ -3321,7 +3321,16 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
}
}
if (load && load->kind == X86_CSE_VEC_DUP)
/* CONST_VECTOR load no larger than integer register
(set (reg:V2QI 294)
(const_vector:V2QI [(const_int 0 [0]) repeated x2]))
can use integer load. */
if (load
&& load->kind == X86_CSE_VEC_DUP
&& (!CONST_VECTOR_P (src)
|| GET_MODE_SIZE (GET_MODE (dest)) > UNITS_PER_WORD))
{
/* Get the source from LOAD as (reg:SI 99) in
@@ -3758,7 +3767,10 @@ ix86_broadcast_inner (rtx op, machine_mode mode,
if (!rtx_equal_p (tmp, first))
return nullptr;
}
*scalar_mode_p = GET_MODE (first);
/* Use the inner mode to handle
(const_vector:V2QI [(const_int 0 [0]) repeated x2])
*/
*scalar_mode_p = GET_MODE_INNER (mode);
*insn_p = nullptr;
return first;
}
@@ -4731,8 +4743,23 @@ pass_x86_cse::x86_cse (void)
broadcast_source = CONSTM1_RTX (mode);
break;
case X86_CSE_VEC_DUP:
reg = gen_reg_rtx (load->mode);
broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
if (CONST_INT_P (load->val)
&& GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
{
/* CONST_VECTOR load no larger than integer
register size can use integer load. */
int nunits = GET_MODE_NUNITS (mode);
rtvec v = rtvec_alloc (nunits);
for (int j = 0; j < nunits ; j++)
RTVEC_ELT (v, j) = load->val;
broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
}
else
{
reg = gen_reg_rtx (load->mode);
broadcast_source = gen_rtx_VEC_DUPLICATE (mode,
reg);
}
break;
default:
gcc_unreachable ();

View File

@@ -0,0 +1,31 @@
/* { dg-do compile } */
/* { dg-options "-mtune=generic -O2 -std=c++11" } */
/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
void *operator new(__SIZE_TYPE__, void *);
template <typename Value> struct hash_map {
void put(Value v) {
int e;
new (&e) Value(v);
}
};
struct tree_string {
char str[];
} string;
long long minsize;
struct attr_access {
bool internal_p;
bool static_p;
} __trans_tmp_1;
hash_map<attr_access> init_attr_rdwr_indices_rwm;
void init_attr_rdwr_indices() {
for (;;) {
attr_access acc{};
if (*string.str) {
acc.internal_p = true;
acc.static_p = minsize;
}
__trans_tmp_1 = acc;
init_attr_rdwr_indices_rwm.put(acc);
}
}

View File

@@ -0,0 +1,21 @@
/* { dg-do compile { target fpic } } */
/* { dg-options "-mtune=generic -O2 -fPIC" } */
/* { dg-additional-options "-march=pentiumpro" { target ia32 } } */
struct target_var_desc {
bool always_copy_from;
bool is_attach;
};
struct target_mem_desc {
struct target_var_desc list[];
};
void
gomp_map_vars_internal (int n, int l)
{
struct target_mem_desc *tgt;
int j;
for (j = 0; j < l; j++)
tgt->list[j].always_copy_from = tgt->list[j].is_attach = false;
for (;j < n; j++)
tgt->list[j].always_copy_from = tgt->list[j].is_attach = false;
}