[PATCH v3] tree-optimization: lower mempcpy to memcpy when result is unused [PR93556]

This patch allows the GIMPLE folder to transform __builtin_mempcpy into
__builtin_memcpy in cases where the return value is ignored. This is beneficial
because most targets have an efficient implementation for memcpy.

Existing tests that relied on the unfolded mempcpy have been duplicated - one
version now takes the folded mempcpy into account, and the other intentionally
prevents the folding from happening.

Bootstrapped and regression tested on x86_64-linux-gnu.

	PR tree-optimization/93556

gcc/ChangeLog:

	* gimple-fold.cc (gimple_fold_builtin_mempcpy): New function.
	(gimple_fold_builtin): Handle BUILT_IN_MEMPCPY.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr79223.c: Rename to gcc.dg/pr79223-1.c and update scans.
	* gcc.dg/tree-prof/val-prof-7.c: Rename to
	gcc.dg/tree-prof/val-prof-7-1.c and update scans.
	* gcc.dg/tree-ssa/builtins-folding-gimple-3.c: Update scans.
	* gcc.dg/builtin-mempcpy-1.c: New test.
	* gcc.dg/builtin-mempcpy-2.c: New test.
	* gcc.dg/pr79223-2.c: New test.
	* gcc.dg/tree-prof/val-prof-7-2.c: New test.
	* gcc.dg/tree-ssa/builtins-folding-gimple-4.c: New test.

Signed-off-by: Netanel Komm <netanelkomm@gmail.com>
This commit is contained in:
Netanel Komm
2026-04-27 07:14:46 -06:00
committed by Jeff Law
parent 7f4cc8140e
commit 4765a2e398
9 changed files with 248 additions and 6 deletions

View File

@@ -3339,6 +3339,32 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator *gsi)
return true;
}
/* Simplify mempcpy call stmt at GSI, returning true if simplified.
Currently only handling mempcpy -> memcpy when the return value
is ignored. */
static bool
gimple_fold_builtin_mempcpy (gimple_stmt_iterator *gsi)
{
gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
if (gimple_call_lhs (stmt) != NULL_TREE)
return false;
tree fn = builtin_decl_explicit (BUILT_IN_MEMCPY);
if (!fn)
return false;
tree dest = gimple_call_arg (stmt, 0);
tree src = gimple_call_arg (stmt, 1);
tree n = gimple_call_arg (stmt, 2);
gcall *repl = gimple_build_call (fn, 3, dest, src, n);
replace_call_with_call_and_fold (gsi, repl);
return true;
}
/* Fold a call EXP to {,v}snprintf having NARGS passed as ARGS. Return
NULL_TREE if a normal call should be emitted rather than expanding
the function inline. FCODE is either BUILT_IN_SNPRINTF_CHK or
@@ -5387,8 +5413,12 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
return gimple_fold_builtin_memset (gsi,
gimple_call_arg (stmt, 1),
gimple_call_arg (stmt, 2));
case BUILT_IN_MEMCPY:
case BUILT_IN_MEMPCPY:
if (gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0),
gimple_call_arg (stmt, 1), fcode))
return true;
return gimple_fold_builtin_mempcpy (gsi);
case BUILT_IN_MEMCPY:
case BUILT_IN_MEMMOVE:
return gimple_fold_builtin_memory_op (gsi, gimple_call_arg (stmt, 0),
gimple_call_arg (stmt, 1), fcode);

View File

@@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-fdump-tree-lower" } */
/* Basic MRE from bug report */
void test_bare (void *d, const void *s, __SIZE_TYPE__ n) {
__builtin_mempcpy (d, s, n);
}
/* { dg-final { scan-tree-dump "__builtin_memcpy" "lower" } } */

View File

@@ -0,0 +1,39 @@
/* { dg-do compile } */
/* { dg-options "-O1 -fdump-tree-optimized" } */
/* Indirectly unused result */
void test_unused_indirect (void *d, const void *s, __SIZE_TYPE__ n) {
void *a = __builtin_mempcpy (d, s, n);
void *b = a;
}
/* Simple used result (in statement) */
void *test_used_simple (void *d, const void *s, __SIZE_TYPE__ n) {
return __builtin_mempcpy (d, s, n);
}
/* More complicated used result (in expression) */
__SIZE_TYPE__ test_used_in_expr (char *d, const char *s, __SIZE_TYPE__ n) {
return (char *)__builtin_mempcpy (d, s, n) - d;
}
/* Unused in all paths */
void *test_unused_indirect2 (void *d, const void *s, __SIZE_TYPE__ n) {
void *a = __builtin_mempcpy (d, s, n);
if (n > 20) {
return (void *)20;
}
return (void *)7;
}
/* Used in at least one path */
void *test_maybe_used (void *d, const void *s, __SIZE_TYPE__ n) {
void *a = __builtin_mempcpy (d, s, n);
if (n > 20) {
return a;
}
return (void *)0;
}
/* { dg-final { scan-tree-dump-times "__builtin_memcpy" 2 "optimized" } } */
/* { dg-final { scan-tree-dump-times "__builtin_mempcpy" 3 "optimized" } } */

View File

@@ -28,7 +28,7 @@ void test_memcpy (void)
void test_mempcpy (void)
{
mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing 4 or more bytes into a region of size 3 overflows the destination" } */
mempcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or more bytes into a region of size 3 overflows the destination" } */
}
void test_memmove (void)

View File

@@ -0,0 +1,38 @@
/* PR middle-end/79223 - missing -Wstringop-overflow on a memmove overflow
{ dg-do compile }
{ dg-additional-options "-O2 -Wall -Wno-array-bounds -std=gnu99" } */
typedef __SIZE_TYPE__ size_t;
extern void* memcpy (void*, const void*, size_t);
extern void* mempcpy (void*, const void*, size_t);
extern void* memmove (void*, const void*, size_t);
char d[3];
char s[4];
void *sink;
size_t range (void)
{
extern size_t size ();
size_t n = size ();
if (n <= sizeof d)
return sizeof d + 1;
return n;
}
void test_memcpy (void)
{
memcpy (d, s, range ()); /* { dg-warning ".memcpy. writing 4 or more bytes into a region of size 3 overflows the destination" } */
}
void test_mempcpy (void)
{
sink = mempcpy (d, s, range ()); /* { dg-warning ".mempcpy. writing 4 or more bytes into a region of size 3 overflows the destination" } */
}
void test_memmove (void)
{
memmove (d + 1, d, range ()); /* { dg-warning ".memmove. writing 4 or more bytes into a region of size 2 overflows the destination" } */
}

View File

@@ -0,0 +1,82 @@
/* { dg-options "-O2 -fdump-ipa-profile-optimized -mtune=core2" } */
/* { dg-skip-if "" { ! { i?86-*-* x86_64-*-* } } } */
char *buffer1;
char *buffer2;
/* Bzero is not tested because it gets transformed into memset. */
#define DEFINE_TEST(N) \
__attribute__((noinline)) \
void memcpy_test_ ## N (int len) \
{ \
__builtin_memcpy (buffer1, buffer2, len); \
} \
\
__attribute__((noinline)) \
void mempcpy_test_ ## N (int len) \
{ \
__builtin_mempcpy (buffer1, buffer2, len); \
} \
\
__attribute__((noinline)) \
void memset_test_ ## N (int len) \
{ \
__builtin_memset (buffer1, 'c', len); \
} \
__attribute__((noinline)) \
void memmove_test_ ## N (int len) \
{ \
__builtin_memmove (buffer1, buffer2, len); \
} \
\
void test_stringops_ ## N(int len) \
{ \
memcpy_test_## N (len); \
mempcpy_test_ ## N (len); \
memset_test_ ## N (len); \
memmove_test_ ## N (len); \
} \
\
void test_stringops_with_values_ ## N (int common, int not_common) \
{ \
int i; \
for (i = 0; i < 1000; i++) \
{ \
if (i > 990) \
test_stringops_ ## N (not_common); \
else \
test_stringops_ ## N (common); \
} \
}
DEFINE_TEST(0);
DEFINE_TEST(1);
DEFINE_TEST(2);
int main() {
buffer1 = __builtin_malloc (1000);
buffer2 = __builtin_malloc (1000);
test_stringops_with_values_0 (8, 55);
test_stringops_with_values_1 (55, 55);
test_stringops_with_values_2 (257, 55);
return 0;
}
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: single value 32 stringop for BUILT_IN_MEMCPY" 0 "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 8 stringop for BUILT_IN_MEMCPY" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 55 stringop for BUILT_IN_MEMCPY" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: single value 32 stringop for BUILT_IN_MEMPCPY" 0 "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 8 stringop for BUILT_IN_MEMSET" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 55 stringop for BUILT_IN_MEMSET" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: single value 32 stringop for BUILT_IN_MEMSET" 0 "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 8 stringop for BUILT_IN_MEMMOVE" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: single value 55 stringop for BUILT_IN_MEMMOVE" "profile" } } */
/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Transformation done: single value 32 stringop for BUILT_IN_MEMMOVE" 0 "profile" } } */

View File

@@ -16,7 +16,7 @@ void memcpy_test_ ## N (int len) \
__attribute__((noinline)) \
void mempcpy_test_ ## N (int len) \
{ \
__builtin_mempcpy (buffer1, buffer2, len); \
void * volatile res = __builtin_mempcpy (buffer1, buffer2, len); \
} \
\
__attribute__((noinline)) \

View File

@@ -38,6 +38,5 @@ AR (memmove, int, 2, INT_MAX, 1);
AR (mempcpy, short, 2, SHRT_MAX, 1);
AR (mempcpy, int, 2, INT_MAX, 1);
/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
{ dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
{ dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } } */
/* { dg-final { scan-tree-dump-times "builtin_memcpy" 10 "optimized" } }
{ dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } } */

View File

@@ -0,0 +1,45 @@
/* PR 81908 - FAIL: gfortran.dg/alloc_comp_auto_array_2.f90 -O3 -g -m32
Test to verify that calls to memcpy et al. where the size is in a range
with more than one valid value are not eliminated (this test complements
builtins-folding-gimple-2.c).
{ dg-do compile }
{ dg-options "-O2 -Wall -fdump-tree-optimized" } */
#define SHRT_MAX __SHRT_MAX__
#define SHRT_MIN (-SHRT_MAX - 1)
#define INT_MAX __INT_MAX__
#define INT_MIN (-INT_MAX - 1)
#define UNIQUE_FUNCNAME(func, line) test_ ## func ## _ ## line
#define FUNCNAME(func, line) UNIQUE_FUNCNAME (func, line)
void *sink;
#define AR(func, type, min, max, val) \
void __attribute__ ((noclone, noinline)) \
FUNCNAME (func, __LINE__) (char *d, const char *s, type n) \
{ \
if ((type)min <= n && n <= (type)max) \
n = val; \
sink = __builtin_ ## func (d, s, n); \
} typedef void DummyType
AR (memcpy, short, SHRT_MIN, 0, 1);
AR (memcpy, short, SHRT_MIN, 1, 2);
AR (memcpy, short, 2, SHRT_MAX, 1);
AR (memcpy, int, INT_MIN, 0, 1);
AR (memcpy, int, INT_MIN, 1, 2);
AR (memcpy, int, INT_MIN, 2, 3);
AR (memcpy, int, 2, INT_MAX, 1);
AR (memcpy, int, 2, INT_MAX, 1);
AR (memmove, short, 2, SHRT_MAX, 1);
AR (memmove, int, 2, INT_MAX, 1);
AR (mempcpy, short, 2, SHRT_MAX, 1);
AR (mempcpy, int, 2, INT_MAX, 1);
/* { dg-final { scan-tree-dump-times "builtin_memcpy" 8 "optimized" } }
{ dg-final { scan-tree-dump-times "builtin_memmove" 2 "optimized" } }
{ dg-final { scan-tree-dump-times "builtin_mempcpy" 2 "optimized" } } */