From 772499fd7e2f9acf28d71dfb3a91d4458531608e Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 27 Jan 2026 12:19:13 -0800 Subject: [PATCH] aarch64: early-ra: Fix handling of multi-register allocation with clobbers [PR123285] So the problem here is while forming chains, we don't process hard register conflicts (and ABI based ones) for allocnos which are already part of a chain. This means sometimes we allocate a register to a color which might be clobbered over is live range. Processing clobbers for all allocnos don't work while forming a chain does not work as the chain's front allocnos' candidates does not get updated. So we need to the processing of clobbers (and ABI clobbers) before starting to form the chains. Changes since v1: * v2: remove accidental hack which was there just for testing. * v3: Move the copying of the shared part to new earlier loop too. Fix small white space issue. Bootstrappd and tested on aarch64-linux-gnu. PR target/123285 gcc/ChangeLog: * config/aarch64/aarch64-early-ra.cc (early_ra::form_chains): Process clobbers and ABI clobbers before starting to form the chain. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr123285-1.c: New test. Signed-off-by: Andrew Pinski --- gcc/config/aarch64/aarch64-early-ra.cc | 44 +++++++++++-------- gcc/testsuite/gcc.target/aarch64/pr123285-1.c | 36 +++++++++++++++ 2 files changed, 62 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr123285-1.c diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index adcb6ca411b..40a30513072 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -2733,6 +2733,32 @@ early_ra::form_chains () if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\nChaining allocnos:\n"); + // Record conflicts of hard register and ABI conflicts before the + // forming of chains so chains have the updated candidates + for (auto *allocno1 : m_allocnos) + { + // Record conflicts with direct uses for FPR hard registers. + auto *group1 = allocno1->group (); + for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr) + if (fpr_conflicts_with_allocno_p (fpr, allocno1)) + group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset)); + + // Record conflicts due to partially call-clobbered registers. + // (Full clobbers are handled by the previous loop.) + for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) + if (call_in_range_p (abi_id, allocno1->start_point, + allocno1->end_point)) + { + auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size); + group1->fpr_candidates &= ~fprs >> allocno1->offset; + } + if (allocno1->is_shared ()) + { + auto *allocno2 = m_allocnos[allocno1->related_allocno]; + merge_fpr_info (allocno2->group (), group1, allocno2->offset); + } + } + // Perform (modified) interval graph coloring. First sort by // increasing start point. m_sorted_allocnos.reserve (m_allocnos.length ()); @@ -2750,30 +2776,12 @@ early_ra::form_chains () if (allocno1->chain_next != INVALID_ALLOCNO) continue; - // Record conflicts with direct uses for FPR hard registers. - auto *group1 = allocno1->group (); - for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr) - if (fpr_conflicts_with_allocno_p (fpr, allocno1)) - group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset)); - - // Record conflicts due to partially call-clobbered registers. - // (Full clobbers are handled by the previous loop.) - for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) - if (call_in_range_p (abi_id, allocno1->start_point, - allocno1->end_point)) - { - auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size); - group1->fpr_candidates &= ~fprs >> allocno1->offset; - } - if (allocno1->is_shared ()) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Allocno %d shares the same hard register" " as allocno %d\n", allocno1->id, allocno1->related_allocno); - auto *allocno2 = m_allocnos[allocno1->related_allocno]; - merge_fpr_info (allocno2->group (), group1, allocno2->offset); m_shared_allocnos.safe_push (allocno1); continue; } diff --git a/gcc/testsuite/gcc.target/aarch64/pr123285-1.c b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c new file mode 100644 index 00000000000..9ef5a28c9af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr123285-1.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ +/* PR target/123285 */ + +#define BS_VEC(type, num) type __attribute__((vector_size(num * sizeof(type)))) + +/* f used to allocate v30 to either a or b and the inline-asm + would clobber the v30. */ +[[gnu::noipa]] +BS_VEC(int, 8) f(BS_VEC(int, 8) a, BS_VEC(int, 8) b) +{ + a+=b; + asm("movi v30.16b, 0":::"v30"); + a+=b; + return a; +} +[[gnu::noipa]] +BS_VEC(int, 8) f1(BS_VEC(int, 8) a, BS_VEC(int, 8) b) +{ + a+=b; + a+=b; + return a; +} + +int main() +{ + BS_VEC(int, 8) a = {0,1,2,3,4,5,6,7}; + BS_VEC(int, 8) b = {8,9,10,11,12,13,14}; + BS_VEC(int, 8) c0 = f(a,b); + BS_VEC(int, 8) c1 = f1(a,b); + for(int i=0;i<8;i++) + if ( c0[i] != c1[i] ) + __builtin_abort (); +} + +