i386: Use x >> ~y for x >> 31-y [PR36503]

x86 targets mask 32-bit shifts with a 5-bit mask (and 64-bit with 6-bit mask),
so they can use x >> ~y instead of x >> 31-y.

The optimization converts:

	movl    $31, %ecx
        subl    %esi, %ecx
        sall    %cl, %eax

to:
        notl    %ecx
        sall    %cl, %eax

	PR target/36503

gcc/ChangeLog:

	* config/i386/i386.md (*<insn:any_shift><mode:SWI48>3_sub):
	Also allow operands[3] & (<mode_bitsize>-1) == (<mode_bitsize>-1)
	in insn condition. Emit NOT RTX instead of NEG RTX in this case.
	(*<insn:any_shift><mode:SWI48>3_sub_1): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr36503-5.c: New test.
	* gcc.target/i386/pr36503-6.c: New test.
This commit is contained in:
Uros Bizjak
2026-01-28 21:57:47 +01:00
parent cb0d29db88
commit b20e68022a
3 changed files with 58 additions and 11 deletions

View File

@@ -18266,24 +18266,27 @@
(match_operand 2 "int248_register_operand" "c,r")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
&& ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|| (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
== <MODE_SIZE> * BITS_PER_UNIT - 1)
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 4)
(neg:QI (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 0)
(any_shift:SWI48 (match_dup 1)
(match_dup 4)))
(clobber (reg:CC FLAGS_REG))])]
{
HOST_WIDE_INT cnt = INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
operands[2] = gen_lowpart (QImode, operands[2]);
operands[4] = gen_reg_rtx (QImode);
rtx (*insn)(rtx, rtx) = (cnt == 0) ? gen_negqi2 : gen_one_cmplqi2;
emit_insn (insn (operands[4], operands[2]));
}
[(set_attr "isa" "*,bmi2")])
@@ -18296,20 +18299,25 @@
(match_operand:QI 2 "register_operand" "c,r"))))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
&& ((INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
|| (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
== <MODE_SIZE> * BITS_PER_UNIT - 1)
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel
[(set (match_dup 4)
(neg:QI (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 0)
(any_shift:SWI48 (match_dup 1)
(match_dup 4)))
(clobber (reg:CC FLAGS_REG))])]
"operands[4] = gen_reg_rtx (QImode);"
{
HOST_WIDE_INT cnt = INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[4] = gen_reg_rtx (QImode);
rtx (*insn)(rtx, rtx) = (cnt == 0) ? gen_negqi2 : gen_one_cmplqi2;
emit_insn (insn (operands[4], operands[2]));
}
[(set_attr "isa" "*,bmi2")])
(define_insn_and_split "*extend<dwi>2_doubleword_highpart"

View File

@@ -0,0 +1,20 @@
/* PR target/36503 */
/* { dg-do compile } */
/* { dg-options "-O2 -masm=att" } */
/* { dg-additional-options "-mregparm=3" { target ia32 } } */
/* { dg-final { scan-assembler-not "movl\[ \\t\]+\\\$31" } } */
int foo (int i, int n)
{
return i << (31 - n);
}
int bar (int i, int n)
{
return i >> (31 - n);
}
unsigned int baz (unsigned int i, int n)
{
return i >> (31 - n);
}

View File

@@ -0,0 +1,19 @@
/* PR target/36503 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -masm=att" } */
/* { dg-final { scan-assembler-not "movl\[ \\t\]+\\\$63" } } */
long long foo (long long i, int n)
{
return i << (63 - n);
}
long long bar (long long i, int n)
{
return i >> (63 - n);
}
unsigned long long baz (unsigned long long i, int n)
{
return i >> (63 - n);
}