From c1926449cad66b7e7875f214109950efdb8bb97b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 5 Mar 2026 11:39:38 +0100 Subject: [PATCH] Fix overly restrictive live-lane extraction replacement The following fixes a regression introduced by r11-5542 which restricts replacing uses of live original defs of now vectorized stmts to when that does not require new loop-closed PHIs to be inserted. That restriction keeps the original scalar definition live which is sub-optimal and also not reflected in costing. The particular case the following fixes can be seen in gcc.dg/vect/bb-slp-57.c is the case where we are replacing an existing loop closed PHI argument. PR tree-optimization/98064 * tree-vect-loop.cc (vectorizable_live_operation): Do not restrict replacing uses in a LC PHI. * gcc.dg/vect/bb-slp-57.c: Verify we do not keep original stmts live. --- gcc/testsuite/gcc.dg/vect/bb-slp-57.c | 1 + gcc/tree-vect-loop.cc | 47 ++++++++++++++++----------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-57.c b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c index 6f13507fd67..6633a3092ad 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-57.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-57.c @@ -36,3 +36,4 @@ void l() /* { dg-final { scan-tree-dump-times "transform load" 1 "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */ /* { dg-final { scan-tree-dump "optimized: basic block" "slp1" { target { { x86_64-*-* i?86-*-* } && lp64 } } } } */ +/* { dg-final { scan-tree-dump-not "missed: Using original scalar computation" "slp1" } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 670a03ea06b..4818a8e88a1 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10441,26 +10441,35 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, "def\n"); continue; } - /* ??? It can also happen that we end up pulling a def into - a loop where replacing out-of-loop uses would require - a new LC SSA PHI node. Retain the original scalar in - those cases as well. PR98064. */ - if (TREE_CODE (new_tree) == SSA_NAME - && !SSA_NAME_IS_DEFAULT_DEF (new_tree) - && (gimple_bb (use_stmt)->loop_father - != gimple_bb (vec_stmt)->loop_father) - && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father, - gimple_bb (use_stmt)->loop_father)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "Using original scalar computation for " - "live lane because there is an out-of-loop " - "definition for it\n"); - continue; - } FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) - SET_USE (use_p, new_tree); + { + /* ??? It can also happen that we end up pulling a def into + a loop where replacing out-of-loop uses would require + a new LC SSA PHI node. Retain the original scalar in + those cases as well. PR98064. */ + edge e; + if (TREE_CODE (new_tree) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (new_tree) + && (gimple_bb (use_stmt)->loop_father + != gimple_bb (vec_stmt)->loop_father) + /* But a replacemend in a LC PHI is OK. This happens + in gcc.dg/vect/bb-slp-57.c for example. */ + && (gimple_code (use_stmt) != GIMPLE_PHI + || (((e = phi_arg_edge_from_use (use_p)), true) + && !loop_exit_edge_p + (gimple_bb (vec_stmt)->loop_father, e))) + && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father, + gimple_bb (use_stmt)->loop_father)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Using original scalar computation for " + "live lane because there is an " + "out-of-loop definition for it\n"); + continue; + } + SET_USE (use_p, new_tree); + } update_stmt (use_stmt); } }