aarch64: early-ra: Fix handling of multi-register allocation with clobbers [PR123285]

So the problem here is while forming chains, we don't process hard register
conflicts (and ABI based ones) for allocnos which are already part of a chain.
This means sometimes we allocate a register to a color which might be clobbered
over is live range.
Processing clobbers for all allocnos don't work while forming a chain does
not work as the chain's front allocnos' candidates does not get updated.
So we need to the processing of clobbers (and ABI clobbers) before starting
to form the chains.

Changes since v1:
 * v2: remove accidental hack which was there just for testing.
 * v3: Move the copying of the shared part to new earlier loop too.
       Fix small white space issue.

Bootstrappd and tested on aarch64-linux-gnu.

	PR target/123285

gcc/ChangeLog:

	* config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): Process clobbers
	and ABI clobbers before starting to form the chain.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/pr123285-1.c: New test.

Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
This commit is contained in:
Andrew Pinski
2026-01-27 12:19:13 -08:00
parent 97965bdc1e
commit 772499fd7e
2 changed files with 62 additions and 18 deletions

View File

@@ -2733,6 +2733,32 @@ early_ra::form_chains ()
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\nChaining allocnos:\n");
// Record conflicts of hard register and ABI conflicts before the
// forming of chains so chains have the updated candidates
for (auto *allocno1 : m_allocnos)
{
// Record conflicts with direct uses for FPR hard registers.
auto *group1 = allocno1->group ();
for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
if (fpr_conflicts_with_allocno_p (fpr, allocno1))
group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
// Record conflicts due to partially call-clobbered registers.
// (Full clobbers are handled by the previous loop.)
for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
if (call_in_range_p (abi_id, allocno1->start_point,
allocno1->end_point))
{
auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
group1->fpr_candidates &= ~fprs >> allocno1->offset;
}
if (allocno1->is_shared ())
{
auto *allocno2 = m_allocnos[allocno1->related_allocno];
merge_fpr_info (allocno2->group (), group1, allocno2->offset);
}
}
// Perform (modified) interval graph coloring. First sort by
// increasing start point.
m_sorted_allocnos.reserve (m_allocnos.length ());
@@ -2750,30 +2776,12 @@ early_ra::form_chains ()
if (allocno1->chain_next != INVALID_ALLOCNO)
continue;
// Record conflicts with direct uses for FPR hard registers.
auto *group1 = allocno1->group ();
for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr)
if (fpr_conflicts_with_allocno_p (fpr, allocno1))
group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset));
// Record conflicts due to partially call-clobbered registers.
// (Full clobbers are handled by the previous loop.)
for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id)
if (call_in_range_p (abi_id, allocno1->start_point,
allocno1->end_point))
{
auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size);
group1->fpr_candidates &= ~fprs >> allocno1->offset;
}
if (allocno1->is_shared ())
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, " Allocno %d shares the same hard register"
" as allocno %d\n", allocno1->id,
allocno1->related_allocno);
auto *allocno2 = m_allocnos[allocno1->related_allocno];
merge_fpr_info (allocno2->group (), group1, allocno2->offset);
m_shared_allocnos.safe_push (allocno1);
continue;
}

View File

@@ -0,0 +1,36 @@
/* { dg-do run } */
/* { dg-options "-O3" } */
/* PR target/123285 */
#define BS_VEC(type, num) type __attribute__((vector_size(num * sizeof(type))))
/* f used to allocate v30 to either a or b and the inline-asm
would clobber the v30. */
[[gnu::noipa]]
BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
{
a+=b;
asm("movi v30.16b, 0":::"v30");
a+=b;
return a;
}
[[gnu::noipa]]
BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b)
{
a+=b;
a+=b;
return a;
}
int main()
{
BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7};
BS_VEC(int, 8) b = {8,9,10,11,12,13,14};
BS_VEC(int, 8) c0 = f(a,b);
BS_VEC(int, 8) c1 = f1(a,b);
for(int i=0;i<8;i++)
if ( c0[i] != c1[i] )
__builtin_abort ();
}