tree-optimization/124068 - fix missed AVX2 vectorization of shift

The following fixes a regression in AVX2 vectorization because on
trunk we are now correctly determine we can shorten a shift operation
but we never really bothered to check we can implement the
resulting operation.  With the patch we now check this.  For shifts
and rotates we have the choice between vector-vector and vector-scalar
operations which in the end depends on whether we perform SLP or not
and how the shift operand matches up.  The patch heuristically
assumes that constant or external shifts can be handled by vector-scalar
operations.

As we were not checking for target support was to allow recursive matching
other patterns, the following still errors on that side in case the
original operation was not supported by the target or it is binary and
the 2nd operand is a constant.  This helps avoiding regressions in
gcc.dg/vect/vect-over-widen-13.c and gcc.dg/vect/vect-div-bitmask-1.c
and gcc.target/aarch64/sve2/div-by-bitmask_1.c where the operation in
question is integer division.

	PR tree-optimization/124068
	* tree-vect-patterns.cc (target_has_vecop_for_code): Move
	earlier, add defaulted optab_subtype parameter.
	(vect_recog_over_widening_pattern): Check that the target
	supports the narrowed operation before committing to the
	pattern.

	* gcc.target/i386/vect-shift-1.c: New testcase.
This commit is contained in:
Richard Biener
2026-02-18 13:46:38 +01:00
parent e47f44074a
commit cdc4d4ada2
2 changed files with 46 additions and 16 deletions

View File

@@ -0,0 +1,10 @@
/* { dg-do compile { target avx2 } } */
/* { dg-options "-O2 -mavx2 -mno-avx512f -fdump-tree-vect-details" } */
void f (short* acc)
{
for (unsigned char row = 0; row < 16; ++row)
acc[row] = acc[row] << row;
}
/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */

View File

@@ -1036,6 +1036,17 @@ vect_reassociating_reduction_p (vec_info *vinfo,
return true;
}
/* Return true iff the target has a vector optab implementing the operation
CODE on type VECTYPE with SUBTYPE. */
static bool
target_has_vecop_for_code (tree_code code, tree vectype,
enum optab_subtype subtype = optab_vector)
{
optab voptab = optab_for_tree_code (code, vectype, subtype);
return voptab && can_implement_p (voptab, TYPE_MODE (vectype));
}
/* match.pd function to match
(cond (cmp@3 a b) (convert@1 c) (convert@2 d))
with conditions:
@@ -3160,16 +3171,36 @@ vect_recog_over_widening_pattern (vec_info *vinfo,
&& (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
op_type = build_nonstandard_integer_type (new_precision, true);
/* We specifically don't check here whether the target supports the
new operation, since it might be something that a later pattern
wants to rewrite anyway. If targets have a minimum element size
for some optabs, we should pattern-match smaller ops to larger ops
where beneficial. */
tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
if (!new_vectype || !op_vectype)
return NULL;
/* Verify we can handle the new operation. For shifts and rotates
apply heuristic of whether we are likely facing vector-vector or
vector-scalar operation. Since we are eventually expecting that
a later pattern might eventually want to rewrite an unsupported
into a supported case error on that side in case the original
operation was not supported either or this is a binary operation
and the 2nd operand is constant. */
if (code == RSHIFT_EXPR || code == LSHIFT_EXPR || code == RROTATE_EXPR)
{
if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
&& ((unprom[1].dt != vect_external_def
&& unprom[1].dt != vect_constant_def)
|| !target_has_vecop_for_code (code, op_vectype, optab_scalar))
&& !(!target_has_vecop_for_code (code, *type_out, optab_vector)
&& ((unprom[1].dt != vect_external_def
|| unprom[1].dt != vect_constant_def)
|| !target_has_vecop_for_code (code, *type_out,
optab_scalar))))
return NULL;
}
else if (!target_has_vecop_for_code (code, op_vectype, optab_vector)
&& (target_has_vecop_for_code (code, *type_out, optab_vector)
&& !(nops == 2 && unprom[1].dt == vect_constant_def)))
return NULL;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
type, new_type);
@@ -4151,17 +4182,6 @@ vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
return pattern_stmt;
}
/* Return true iff the target has a vector optab implementing the operation
CODE on type VECTYPE. */
static bool
target_has_vecop_for_code (tree_code code, tree vectype)
{
optab voptab = optab_for_tree_code (code, vectype, optab_vector);
return voptab
&& can_implement_p (voptab, TYPE_MODE (vectype));
}
/* Verify that the target has optabs of VECTYPE to perform all the steps
needed by the multiplication-by-immediate synthesis algorithm described by
ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is